# install.packages(c("gridExtra", "ggplot2"))
library(gridExtra)
library(ggplot2)

# Root directory of the replication material.
# NOTE: A sub-directory within this root directory should be ./SimStudyData
# which should include csv output by SimStudy1.R, SimStudy2.R, SimStudy3A.R, and SimStudy3B.R
# If replicating with the pre-simulated data, simply unzip SimStudyData.zip from
# the replication folder.
workingDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"

# Directory to output figures
outputDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"
setwd(workingDirectory)

# A custom ggplot theme
my.theme <- function(base_size = 11, base_family = "",
                     grid.x_colour = NA, grid.y_colour = NA,
                     grid.x_linetype = 1, grid.y_linetype = 1,
                     strip_colour = "grey15", strip_text = "black",
                     background_colour = "transparent",
                     tick_colour = "black",
                     borderless = 0, bordersize = 0.5) { 
  if(is.na(grid.x_colour)) grid.x_colour <- "transparent"
  if(is.na(grid.y_colour)) grid.y_colour <- "transparent"
  if(borderless == 2) {
    border <- theme(panel.border = element_blank(),
                    panel.background = element_blank(),
                    strip.background = element_blank())
  }

  else if(borderless == 1) {
    border <- theme(axis.line = element_line(colour = "black", size = 0.25),
                    panel.border = element_blank(),
                    panel.background = element_blank(),
                    strip.background = element_blank())
  }
  else if(borderless == 0) border <- theme()
    theme( 
      axis.text.x       = element_text(family = base_family, colour = "grey15", size = base_size, vjust = 1, lineheight = 0.9),
      axis.text.y       = element_text(family = base_family, colour = "grey15", size = base_size, hjust = 1, lineheight = 0.9),
      axis.ticks        = element_line(colour = tick_colour, size = 0.2),
      axis.ticks.length = unit(0.25, "lines"),
      legend.background = element_blank(),
      legend.key = element_blank(),
      legend.key.size = unit(0.6, "lines"),
      legend.text = element_text(family = base_family, size = base_size, face = "plain", lineheight = 1),
      legend.text.align = 0, 
      legend.title = element_text(family = base_family, size = base_size, face = "plain", hjust = 0),
      legend.title.align = 0,
      legend.position = "top",
      legend.direction = "horizontal",
      legend.justification = c(1, 1),
      panel.background = element_rect(fill = "white"),
      panel.border = element_rect(fill = "transparent", colour = "black", size = bordersize),
      panel.grid.major.x = element_line(colour = grid.x_colour, size = 0.15, linetype = grid.x_linetype),
      panel.grid.major.y = element_line(colour = grid.y_colour, size = 0.15, linetype = grid.y_linetype),
      panel.grid.minor = element_blank(),
      panel.spacing = unit(0.75, "lines"),
      strip.background = element_blank(),
      strip.text.x = element_text(family = base_family, size = base_size, face = "plain", colour = strip_text),
      strip.text.y = element_text(family = base_family, size = base_size, face = "plain", angle = -90, colour = strip_text),
      plot.background = element_rect(fill = background_colour, colour = background_colour),
      plot.title = element_text(hjust = 0.5, family = base_family, size = base_size * 1.1, vjust = 0, face = "bold")
    ) +
    border
}

# Root mean squared error
getRMSE <- function(x) sqrt(mean((mean(x) - x)^2))


########## SIMULATION STUDY 1

# Load data
dataList <- sort(grep("Sim1", list.files(path = "SimStudyData/"), ignore.case = FALSE, value = TRUE), decreasing = TRUE)
SS1 <- do.call(rbind, lapply(dataList, function(x) read.csv(paste0("SimStudyData/", x), stringsAsFactors = FALSE)))

SS1$par.sensitive.lower <- SS1$par.sensitive - SS1$se.sensitive * 1.645
SS1$par.sensitive.upper <- SS1$par.sensitive + SS1$se.sensitive * 1.645

G1 <- data.frame(proportion = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, NA),
                 model = c(rep("Proposed estimator", 9), "Standard estimator"),
                 rmse = NA, bias = NA, coverage = NA)

for(p in c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
  G1$rmse[G1$proportion == p] <- getRMSE(SS1$par.sensitive[SS1$proportion == p & SS1$model == "Proposed estimator"])
  G1$bias[G1$proportion == p] <- mean(0 - SS1$par.sensitive[SS1$proportion == p & SS1$model == "Proposed estimator"])
  G1$coverage[G1$proportion == p] <- prop.table(table(SS1$par.sensitive.lower[SS1$proportion == p & SS1$model == "Proposed estimator"] < 0 &
                                                      SS1$par.sensitive.upper[SS1$proportion == p & SS1$model == "Proposed estimator"] > 0))[2]
}

G1$rmse[G1$model == "Standard estimator"] <- getRMSE(SS1$par.sensitive[SS1$model == "Standard estimator"])
G1$bias[G1$model == "Standard estimator"] <- mean(0 - SS1$par.sensitive[SS1$model == "Standard estimator"])
G1$coverage[G1$model == "Standard estimator"] <- prop.table(table(SS1$par.sensitive.lower[SS1$model == "Standard estimator"] < 0 &
                                                                  SS1$par.sensitive.upper[SS1$model == "Standard estimator"] > 0))[2]


graphRMSE <- ggplot(subset(G1, model == "Proposed estimator"), aes(x = proportion, y = rmse)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  labs(x = "Proportion that misreport", y = "", title = "\nRMSE") +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(-0.0015, 0.2015), expand = FALSE) +
  scale_x_continuous(breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(breaks = c(0, 0.05, 0.1, 0.15, 0.20), labels = c("0", "", "0.1", "", "0.2")) +
  geom_hline(yintercept = G1$rmse[G1$model == "Standard estimator"], linetype = 5, size = 0.35) +
  geom_line(size = 0.35) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 0.5, -0.75), "lines"))

graphBias <- ggplot(subset(G1, model == "Proposed estimator"), aes(x = proportion, y = bias)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(-0.203, 0.203), expand = FALSE) +
  scale_x_continuous(breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(breaks = c(-0.2, -0.1, 0, 0.1, 0.2), labels = c("-0.2", "", "0", "", "0.2")) +
  labs(x = "Proportion that misreport", y = "", title = "\nBias") +
  geom_hline(yintercept = 0, linetype = 3) +
  geom_hline(yintercept = G1$bias[G1$model == "Standard estimator"], linetype = 5, size = 0.35) +
  geom_line(size = 0.35) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 0.5, -0.75), "lines"))

graphCoverage <- ggplot(subset(G1, model == "Proposed estimator"), aes(x = proportion, y = coverage)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(0.799, 1.001), expand = FALSE) +
  scale_x_continuous(limits = c(0.1, 0.904), breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(limits = c(0.803, 1.003), breaks = c(0.8, 0.85, 0.9, 0.95, 1), labels = c("0.8", "", "0.9", "", "1")) +
  labs(x = "Proportion that misreport", y = "", title = "Coverage of 90%\nconfidence interval") +
  geom_hline(yintercept = 0.9, linetype = 3) +
  geom_hline(yintercept = G1$coverage[G1$model == "Standard estimator"], linetype = 5, size = 0.35) +
  geom_line(size = 0.35) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 0.5, -0.75), "lines"))

# FIGURE 1
pdf(paste0(outputDirectory, "/EADY-FIGURE1-SimStudy1-raw.pdf"), 6, 2.3)
grid.arrange(graphRMSE, graphBias, graphCoverage, ncol = 3)
dev.off()


########## SIMULATION STUDY 2

param.control <- c("(Intercept)" = -0.25,
                   "continuous" = 0.25,
                   "binary" = -0.5,
                   "U" = 0,
                   "Z" = 0.5)

param.sensitive <- c("(Intercept)" = 0,
                     "continuous" = 0.25,
                     "binary" = -0.5)

param.misreport <- c("(Intercept)" = 0.5,
                     "continuous" = -0.5,
                     "binary" = 0.25)

# Load data
dataList <- sort(grep("Sim2", list.files(path = "SimStudyData/"), ignore.case = FALSE, value = TRUE), decreasing = TRUE)
SS2 <- do.call(rbind, lapply(dataList, function(x) read.csv(paste0("SimStudyData/", x), stringsAsFactors = FALSE)))

SS2$par.control.intercept.lower <- SS2$par.control.intercept - SS2$se.control.intercept * 1.645
SS2$par.control.intercept.upper <- SS2$par.control.intercept + SS2$se.control.intercept * 1.645
SS2$par.control.binary.lower <- SS2$par.control.binary - SS2$se.control.binary * 1.645
SS2$par.control.binary.upper <- SS2$par.control.binary + SS2$se.control.binary * 1.645
SS2$par.control.continuous.lower <- SS2$par.control.continuous - SS2$se.control.continuous * 1.645
SS2$par.control.continuous.upper <- SS2$par.control.continuous + SS2$se.control.continuous * 1.645
SS2$par.control.z.lower <- SS2$par.control.z - SS2$se.control.z * 1.645
SS2$par.control.z.upper <- SS2$par.control.z + SS2$se.control.z * 1.645

SS2$par.sensitive.intercept.lower <- SS2$par.sensitive.intercept - SS2$se.sensitive.intercept * 1.645
SS2$par.sensitive.intercept.upper <- SS2$par.sensitive.intercept + SS2$se.sensitive.intercept * 1.645
SS2$par.sensitive.binary.lower <- SS2$par.sensitive.binary - SS2$se.sensitive.binary * 1.645
SS2$par.sensitive.binary.upper <- SS2$par.sensitive.binary + SS2$se.sensitive.binary * 1.645
SS2$par.sensitive.continuous.lower <- SS2$par.sensitive.continuous - SS2$se.sensitive.continuous * 1.645
SS2$par.sensitive.continuous.upper <- SS2$par.sensitive.continuous + SS2$se.sensitive.continuous * 1.645

SS2$par.misreport.intercept.lower <- SS2$par.misreport.intercept - SS2$se.misreport.intercept * 1.645
SS2$par.misreport.intercept.upper <- SS2$par.misreport.intercept + SS2$se.misreport.intercept * 1.645
SS2$par.misreport.binary.lower <- SS2$par.misreport.binary - SS2$se.misreport.binary * 1.645
SS2$par.misreport.binary.upper <- SS2$par.misreport.binary + SS2$se.misreport.binary * 1.645
SS2$par.misreport.continuous.lower <- SS2$par.misreport.continuous - SS2$se.misreport.continuous * 1.645
SS2$par.misreport.continuous.upper <- SS2$par.misreport.continuous + SS2$se.misreport.continuous * 1.645

G2A <- data.frame(model = rep(c("Standard estimator", "Proposed estimator"), each = 9),
                  variable = rep(rep(c("Intercept", "Binary", "Continuous"), each = 3), 2),
                  n = rep(c(2000, 3000, 5000), 6),
                  rmse = NA, bias = NA, coverage = NA)

for(n in c(2000, 3000, 5000)) {
  for(model in c("Standard estimator", "Proposed estimator")) {
    G2A$rmse[G2A$n == n & G2A$model == model & G2A$variable == "Intercept"] <-
      getRMSE(SS2$par.sensitive.intercept[SS2$n == n & SS2$model == model])
    G2A$rmse[G2A$n == n & G2A$model == model & G2A$variable == "Binary"] <-
      getRMSE(SS2$par.sensitive.binary[SS2$n == n & SS2$model == model])
    G2A$rmse[G2A$n == n & G2A$model == model & G2A$variable == "Continuous"] <-
      getRMSE(SS2$par.sensitive.continuous[SS2$n == n & SS2$model == model])

    G2A$bias[G2A$n == n & G2A$model == model & G2A$variable == "Intercept"] <-
      mean(param.sensitive["(Intercept)"] - SS2$par.sensitive.intercept[SS2$n == n & SS2$model == model])
    G2A$bias[G2A$n == n & G2A$model == model & G2A$variable == "Binary"] <-
      mean(param.sensitive["binary"] - SS2$par.sensitive.binary[SS2$n == n & SS2$model == model])
    G2A$bias[G2A$n == n & G2A$model == model & G2A$variable == "Continuous"] <-
      mean(param.sensitive["continuous"] - SS2$par.sensitive.continuous[SS2$n == n & SS2$model == model])

    G2A$coverage[G2A$n == n & G2A$model == model & G2A$variable == "Intercept"] <-
      prop.table(table(SS2$par.sensitive.intercept.lower[SS2$n == n & SS2$model == model] < param.sensitive["(Intercept)"] &
                       SS2$par.sensitive.intercept.upper[SS2$n == n & SS2$model == model] > param.sensitive["(Intercept)"]))[2]
    G2A$coverage[G2A$n == n & G2A$model == model & G2A$variable == "Binary"] <-
      prop.table(table(SS2$par.sensitive.binary.lower[SS2$n == n & SS2$model == model] < param.sensitive["binary"] &
                       SS2$par.sensitive.binary.upper[SS2$n == n & SS2$model == model] > param.sensitive["binary"]))[2]
    G2A$coverage[G2A$n == n & G2A$model == model & G2A$variable == "Continuous"] <-
      prop.table(table(SS2$par.sensitive.continuous.lower[SS2$n == n & SS2$model == model] < param.sensitive["continuous"] &
                       SS2$par.sensitive.continuous.upper[SS2$n == n & SS2$model == model] > param.sensitive["continuous"]))[2]
  }
}

G2A$variable <- factor(G2A$variable, levels = c("Intercept", "Binary", "Continuous"))
G2A$model <- factor(G2A$model, levels = c("Standard estimator", "Proposed estimator"))

graphRMSE <- ggplot(G2A, aes(x = n, y = rmse, colour = model, fill = model, shape = model, size = model)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 3) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(-0.03, 0.43), expand = FALSE) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5), labels = c("0", "0.1", "0.2", "0.3", "0.4", "0.5")) +
  labs(x = "Sample size", y = "RMSE", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphBias <- ggplot(G2A, aes(x = n, y = bias, colour = model, fill = model, shape = model, size = model)) + 
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 3) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(-0.203, 0.203), expand = FALSE) +
  scale_y_continuous(breaks = c(-0.2, -0.1, 0, 0.1, 0.2), labels = c("-0.2", "", "0", "", "0.2")) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  labs(x = "Sample size", y = "Bias", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = "none",
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphCoverage <- ggplot(G2A, aes(x = n, y = coverage, colour = model, fill = model, size = model, shape = model)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 3) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(0.7985, 1.0015), expand = FALSE) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  scale_y_continuous(breaks = c(0.8, 0.85, 0.9, 0.95, 1), labels = c("0.8", "", "0.9", "", "1")) +
  labs(x = "Sample size", y = "Coverage of 90% CI", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0.9, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = "none",
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

# FIGURE 2
# Edited to 5.5, 5.75
pdf(paste0(outputDirectory, "/EADY-FIGURE2-SimStudy2-Sensitive-raw.pdf"), 5.5, 6.5)
grid.arrange(graphRMSE, graphBias, graphCoverage, ncol = 1)
dev.off()


# CONTROL

G2B <- data.frame(model = rep(c("Standard estimator", "Proposed estimator"), each = 12),
                  variable = rep(rep(c("Intercept", "Binary", "Continuous", "Z"), each = 3), 2),
                  n = rep(c(2000, 3000, 5000), 8),
                  rmse = NA, bias = NA, coverage = NA)

for(n in c(2000, 3000, 5000)) {
  for(model in c("Standard estimator", "Proposed estimator")) {
    G2B$rmse[G2B$n == n & G2B$model == model & G2B$variable == "Intercept"] <-
      getRMSE(SS2$par.control.intercept[SS2$n == n & SS2$model == model])
    G2B$rmse[G2B$n == n & G2B$model == model & G2B$variable == "Binary"] <-
      getRMSE(SS2$par.control.binary[SS2$n == n & SS2$model == model])
    G2B$rmse[G2B$n == n & G2B$model == model & G2B$variable == "Continuous"] <-
      getRMSE(SS2$par.control.continuous[SS2$n == n & SS2$model == model])
    G2B$rmse[G2B$n == n & G2B$model == model & G2B$variable == "Z"] <-
      getRMSE(SS2$par.control.z[SS2$n == n & SS2$model == model])

    G2B$bias[G2B$n == n & G2B$model == model & G2B$variable == "Intercept"] <-
      mean(param.control["(Intercept)"] - SS2$par.control.intercept[SS2$n == n & SS2$model == model])
    G2B$bias[G2B$n == n & G2B$model == model & G2B$variable == "Binary"] <-
      mean(param.control["binary"] - SS2$par.control.binary[SS2$n == n & SS2$model == model])
    G2B$bias[G2B$n == n & G2B$model == model & G2B$variable == "Continuous"] <-
      mean(param.control["continuous"] - SS2$par.control.continuous[SS2$n == n & SS2$model == model])
    G2B$bias[G2B$n == n & G2B$model == model & G2B$variable == "Z"] <-
      mean(param.control["Z"] - SS2$par.control.z[SS2$n == n & SS2$model == model])

    G2B$coverage[G2B$n == n & G2B$model == model & G2B$variable == "Intercept"] <-
      prop.table(table(SS2$par.control.intercept.lower[SS2$n == n & SS2$model == model] < param.control["(Intercept)"] &
                       SS2$par.control.intercept.upper[SS2$n == n & SS2$model == model] > param.control["(Intercept)"]))[2]
    G2B$coverage[G2B$n == n & G2B$model == model & G2B$variable == "Binary"] <-
      prop.table(table(SS2$par.control.binary.lower[SS2$n == n & SS2$model == model] < param.control["binary"] &
                       SS2$par.control.binary.upper[SS2$n == n & SS2$model == model] > param.control["binary"]))[2]
    G2B$coverage[G2B$n == n & G2B$model == model & G2B$variable == "Continuous"] <-
      prop.table(table(SS2$par.control.continuous.lower[SS2$n == n & SS2$model == model] < param.control["continuous"] &
                       SS2$par.control.continuous.upper[SS2$n == n & SS2$model == model] > param.control["continuous"]))[2]
    G2B$coverage[G2B$n == n & G2B$model == model & G2B$variable == "Z"] <-
      prop.table(table(SS2$par.control.z.lower[SS2$n == n & SS2$model == model] < param.control["Z"] &
                       SS2$par.control.z.upper[SS2$n == n & SS2$model == model] > param.control["Z"]))[2]
  }
}

G2B$variable <- as.character(G2B$variable)
G2B$variable[G2B$variable == "Z"] <- "Z*"
G2B$variable <- factor(G2B$variable, levels = c("Intercept", "Binary", "Continuous", "Z*"))
G2B$model <- factor(G2B$model, levels = c("Standard estimator", "Proposed estimator"))

graphRMSE <- ggplot(G2B, aes(x = n, y = rmse, colour = model, fill = model, shape = model, size = model)) +
  my.theme(borderless = 0, base_size = 10, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 5) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(-0.01, 0.121), expand = FALSE) +
  scale_x_continuous(limits = c(1800, 5200), breaks = c(2000, 3000, 5000), labels = c("2000", "3000", "5000")) +
  scale_y_continuous(limits = c(-0.01, 0.16), breaks = c(0, 0.04, 0.08, 0.12), labels = c("0", "0.04", "0.08", "0.12")) +
  labs(x = "Sample size", y = "RMSE", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2.25) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphBias <- ggplot(G2B, aes(x = n, y = bias, colour = model, fill = model, shape = model, size = model)) + 
  my.theme(borderless = 0, base_size = 10, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 5) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(-0.203, 0.203), expand = FALSE) +
  scale_y_continuous(breaks = c(-0.2, -0.1, 0, 0.1, 0.2), labels = c("-0.2", "", "0", "", "0.2")) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  labs(x = "Sample size", y = "Bias", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2.25) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = "none",
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphCoverage <- ggplot(G2B, aes(x = n, y = coverage, colour = model, fill = model, size = model, shape = model)) +
  my.theme(borderless = 0, base_size = 10, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 5) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(0.7985, 1.0015), expand = FALSE) +
  scale_x_continuous(breaks = c(2000, 3000, 5000), labels = c("2000", "3000", "5000")) +
  scale_y_continuous(breaks = c(0.8, 0.85, 0.9, 0.95, 1), labels = c("0.8", "", "0.9", "", "1")) +
  labs(x = "Sample size", y = "Coverage of 90% CI", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0.9, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2.25) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = "none",
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

# APPENDIX: FIGURE A1
# Edited to 5.5, 5.75
pdf(paste0(outputDirectory, "/EADY-FIGUREA1-SimStudy2-Control-raw.pdf"), 7, 6.5)
grid.arrange(graphRMSE, graphBias, graphCoverage, ncol = 1)
dev.off()


# MISREPORTING

G2C <- data.frame(model = rep("Proposed estimator", 9),
                  variable = rep(c("Intercept", "Binary", "Continuous"), each = 3),
                  n = rep(c(2000, 3000, 5000), 3),
                  rmse = NA, bias = NA, coverage = NA)

for(n in c(2000, 3000, 5000)) {
    G2C$rmse[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Intercept"] <-
      getRMSE(SS2$par.misreport.intercept[SS2$n == n & SS2$model == "Proposed estimator"])
    G2C$rmse[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Binary"] <-
      getRMSE(SS2$par.misreport.binary[SS2$n == n & SS2$model == "Proposed estimator"])
    G2C$rmse[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Continuous"] <-
      getRMSE(SS2$par.misreport.continuous[SS2$n == n & SS2$model == "Proposed estimator"])

    G2C$bias[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Intercept"] <-
      mean(param.misreport["(Intercept)"] - SS2$par.misreport.intercept[SS2$n == n & SS2$model == "Proposed estimator"])
    G2C$bias[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Binary"] <-
      mean(param.misreport["binary"] - SS2$par.misreport.binary[SS2$n == n & SS2$model == "Proposed estimator"])
    G2C$bias[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Continuous"] <-
      mean(param.misreport["continuous"] - SS2$par.misreport.continuous[SS2$n == n & SS2$model == "Proposed estimator"])

    G2C$coverage[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Intercept"] <-
      prop.table(table(SS2$par.misreport.intercept.lower[SS2$n == n & SS2$model == "Proposed estimator"] < param.misreport["(Intercept)"] &
                       SS2$par.misreport.intercept.upper[SS2$n == n & SS2$model == "Proposed estimator"] > param.misreport["(Intercept)"]))[2]
    G2C$coverage[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Binary"] <-
      prop.table(table(SS2$par.misreport.binary.lower[SS2$n == n & SS2$model == "Proposed estimator"] < param.misreport["binary"] &
                       SS2$par.misreport.binary.upper[SS2$n == n & SS2$model == "Proposed estimator"] > param.misreport["binary"]))[2]
    G2C$coverage[G2C$n == n & G2C$model == "Proposed estimator" & G2C$variable == "Continuous"] <-
      prop.table(table(SS2$par.misreport.continuous.lower[SS2$n == n & SS2$model == "Proposed estimator"] < param.misreport["continuous"] &
                       SS2$par.misreport.continuous.upper[SS2$n == n & SS2$model == "Proposed estimator"] > param.misreport["continuous"]))[2]
}

G2C$variable <- factor(G2C$variable, levels = c("Intercept", "Binary", "Continuous", "Z"))
G2C$model <- factor(G2C$model, levels = c("Standard estimator", "Proposed estimator"))

graphRMSE <- ggplot(G2C, aes(x = n, y = rmse, colour = model, fill = model, shape = model, size = model)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 3) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(-0.03, 0.33), expand = FALSE) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5), labels = c("0", "0.1", "0.2", "0.3", "0.4", "0.5")) +
  labs(x = "Sample size", y = "RMSE", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphBias <- ggplot(G2C, aes(x = n, y = bias, colour = model, fill = model, shape = model, size = model)) + 
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 3) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(-0.203, 0.203), expand = FALSE) +
  scale_y_continuous(breaks = c(-0.2, -0.1, 0, 0.1, 0.2), labels = c("-0.2", "", "0", "", "0.2")) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  labs(x = "Sample size", y = "Bias", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = "none",
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphCoverage <- ggplot(G2C, aes(x = n, y = coverage, colour = model, fill = model, size = model, shape = model)) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  facet_wrap(~ variable, scales = "free_x", ncol = 3) +
  coord_cartesian(xlim = c(1800, 5200), ylim = c(0.7985, 1.0015), expand = FALSE) +
  scale_x_continuous(breaks = c(2000, 3000, 5000)) +
  scale_y_continuous(breaks = c(0.8, 0.85, 0.9, 0.95, 1), labels = c("0.8", "", "0.9", "", "1")) +
  labs(x = "Sample size", y = "Coverage of 90% CI", colour = "", fill = "", shape = "", size = "", title = "") +
  geom_hline(yintercept = 0.9, size = 0.25, linetype = 2) +
  geom_line(size = 0.25, show.legend = FALSE) +
  geom_point(size = 2) +
  scale_shape_manual(values = c("Standard estimator" = 21, "Proposed estimator" = 21)) +
  scale_colour_manual(values = c("Standard estimator" = "black", "Proposed estimator" = "black")) +
  scale_fill_manual(values = c("Standard estimator" = "white", "Proposed estimator" = "black")) +
  scale_size_manual(values = c("Standard estimator" = 2, "Proposed estimator" = 2)) +
  theme(legend.position = "none",
        legend.background = element_blank(),
        plot.margin = unit(c(-0.5, 0.35, 0.25, 0.25), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

# APPENDIX: FIGURE A2
# Edited to 5.5, 5.75
pdf(paste0(outputDirectory, "/EADY-FIGUREA2-SimStudy2-Misreport-raw.pdf"), 5.5, 6.5)
grid.arrange(graphRMSE, graphBias, graphCoverage, ncol = 1)
dev.off()


########## SIMULATION STUDY 3A

dataList <- sort(grep("Sim3A", list.files(path = "SimStudyData/"), ignore.case = FALSE, value = TRUE), decreasing = TRUE)
SS3A <- do.call(rbind, lapply(dataList, function(x) read.csv(paste0("SimStudyData/", x), stringsAsFactors = FALSE)))

nrow(SS3A)
SS3A <- subset(SS3A, SS3A$par.misreport > qlogis(0.005) & SS3A$par.misreport < qlogis(0.995) &
                     SS3A$se.misreport < 1 & !apply(SS3A, 1, function(x) any(is.na(x)))) # Remove degenerate solutions (rare, but can occur when very few misreport)
nrow(SS3A)

SS3A$low.misreport <- apply(SS3A[, c("par.misreport", "se.misreport")], 1, function(x) plogis(x[1] - x[2] * 1.645))
SS3A$high.misreport <- apply(SS3A[, c("par.misreport", "se.misreport")], 1, function(x) plogis(x[1] + x[2] * 1.645))

G3A <- data.frame(proportion = rep(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9), 3),
                  n = rep(c(2000, 3000, 5000), each = 9),
                  rmse = NA, bias = NA, coverage = NA)

for(n in c(2000, 3000, 5000)) {
  for(p in c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
    G3A$rmse[G3A$proportion == p & G3A$n == n] <- getRMSE(SS3A$par.misreport[SS3A$proportion == p & SS3A$n == n])
    G3A$bias[G3A$proportion == p & G3A$n == n] <- mean(0 - SS3A$par.misreport[SS3A$proportion == p & SS3A$n == n])
    G3A$coverage[G3A$proportion == p & G3A$n == n] <- prop.table(table(SS3A$low.misreport[SS3A$proportion == p & SS3A$n == n] < 0.5 &
                                                                       SS3A$high.misreport[SS3A$proportion == p & SS3A$n == n] > 0.5))[2]
  }
}

graphRMSE <- ggplot(G3A, aes(x = proportion, y = rmse, color = as.factor(n), size = as.factor(n))) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(-0.0045, 0.6045), expand = FALSE) +
  scale_x_continuous(breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6), labels = c("0", "", "0.2", "", "0.4", "", "0.6")) +
  labs(x = "Proportion holding sensitive belief", y = "", title = "\nRMSE", color = "Sample size: ", size = "Sample size: ") +
  geom_line(show.legend = TRUE) +
  scale_size_manual(values = c(0.25, 0.3, 0.4)) +
  scale_color_manual(values = c("grey85", "grey60", "black")) +
  theme(legend.position = c(1.25, 1),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 1.25, -0.75), "lines")) +
  guides(color = guide_legend(override.aes = list(shape = 21, size = 1)))

graphBias <- ggplot(G3A, aes(x = proportion, y = bias, color = as.factor(n), size = as.factor(n))) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(-0.203, 0.203), expand = FALSE) +
  scale_x_continuous(limits = c(0.1, 0.904), breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(limits = c(-0.203, 0.2003), breaks = c(-0.2, -0.1, 0, 0.1, 0.2), labels = c("-0.2", "", "0", "", "0.2")) +
  labs(x = "Proportion holding sensitive belief", y = "", title = "\nBias") +
  geom_hline(yintercept = 0, linetype = 3) +
  geom_line(show.legend = FALSE) +
  scale_size_manual(values = c(0.25, 0.3, 0.4)) +
  scale_color_manual(values = c("grey85", "grey60", "black")) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 1.25, -0.75), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphCoverage <- ggplot(G3A, aes(x = proportion, y = coverage, color = as.factor(n), size = as.factor(n))) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(0.799, 1.001), expand = FALSE) +
  scale_x_continuous(limits = c(0.1, 0.904), breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(limits = c(0.803, 1.003), breaks = c(0.8, 0.85, 0.9, 0.95, 1), labels = c("0.8", "", "0.9", "", "1")) +
  labs(x = "Proportion holding sensitive belief", y = "", title = "Coverage of 90%\nconfidence interval") +
  geom_hline(yintercept = 0.9, linetype = 3) +
  geom_line(show.legend = FALSE) +
  scale_size_manual(values = c(0.25, 0.3, 0.4)) +
  scale_color_manual(values = c("grey85", "grey60", "black")) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 1.25, -0.75), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

# FIGURE 3 (TOP PANEL)
pdf(paste0(outputDirectory, "/EADY-FIGURE3-SimStudy3A-raw.pdf"), 6, 2.4)
grid.arrange(graphRMSE, graphBias, graphCoverage, ncol = 3)
dev.off()


########## SIMULATION STUDY 3B

dataList <- sort(grep("Sim3B", list.files(path = "SimStudyData/"), ignore.case = FALSE, value = TRUE), decreasing = TRUE)
SS3B <- do.call(rbind, lapply(dataList, function(x) read.csv(paste0("SimStudyData/", x), stringsAsFactors = FALSE)))

nrow(SS3B)
SS3B <- subset(SS3B, SS3B$par.misreport > qlogis(0.005) & SS3B$par.misreport < qlogis(0.995) &
                     SS3B$se.misreport < 1 & !apply(SS3B, 1, function(x) any(is.na(x)))) # Remove degenerate solutions (rare, but can occur when very few misreport)
nrow(SS3B)

SS3B$low.misreport <- apply(SS3B[, c("par.misreport", "se.misreport")], 1, function(x) plogis(x[1] - x[2] * 1.645))
SS3B$high.misreport <- apply(SS3B[, c("par.misreport", "se.misreport")], 1, function(x) plogis(x[1] + x[2] * 1.645))

G3B <- data.frame(proportion = rep(c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9), 3),
                  n = rep(c(2000, 3000, 5000), each = 9),
                  rmse = NA, bias = NA, coverage = NA)

for(n in c(2000, 3000, 5000)) {
  for(p in c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)) {
    G3B$rmse[G3B$proportion == p & G3B$n == n] <- getRMSE(SS3B$par.misreport[SS3B$proportion == p & SS3B$n == n])
    G3B$bias[G3B$proportion == p & G3B$n == n] <- mean(qlogis(p) - SS3B$par.misreport[SS3B$proportion == p & SS3B$n == n])
    G3B$coverage[G3B$proportion == p & G3B$n == n] <- prop.table(table(SS3B$low.misreport[SS3B$proportion == p & SS3B$n == n] < p &
                                                                       SS3B$high.misreport[SS3B$proportion == p & SS3B$n == n] > p))[2]
  }
}

graphRMSE <- ggplot(G3B, aes(x = proportion, y = rmse, color = as.factor(n), size = as.factor(n))) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(-0.003, 0.503), expand = FALSE) +
  scale_x_continuous(limits = c(0.1, 0.904), breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(limits = c(-0.002, 0.5002), breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5), labels = c("0", "0.1", "0.2", "0.3", "0.4", "0.5")) +
  labs(x = "Proportion that misreport", y = "", title = "\nRMSE", color = "Sample size: ", size = "Sample size: ") +
  geom_line(show.legend = FALSE) +
  scale_size_manual(values = c(0.25, 0.3, 0.4)) +
  scale_color_manual(values = c("grey85", "grey60", "black")) +
  theme(legend.position = c(1.25, 1),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 1.25, -0.75), "lines")) +
  guides(color = guide_legend(override.aes = list(shape = 21, size = 1)))

graphBias <- ggplot(G3B, aes(x = proportion, y = bias, color = as.factor(n), size = as.factor(n))) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(-0.203, 0.203), expand = FALSE) +
  scale_x_continuous(limits = c(0.1, 0.904), breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(limits = c(-0.103, 0.1003), breaks = c(-0.2, -0.1, 0, 0.1, 0.2), labels = c("-0.2", "", "0", "", "0.2")) +
  labs(x = "Proportion that misreport", y = "", title = "\nBias") +
  geom_hline(yintercept = 0, linetype = 3) +
  geom_line(show.legend = FALSE) +
  scale_size_manual(values = c(0.25, 0.3, 0.4)) +
  scale_color_manual(values = c("grey85", "grey60", "black")) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 1.25, -0.75), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

graphCoverage <- ggplot(G3B, aes(x = proportion, y = coverage, color = as.factor(n), size = as.factor(n))) +
  my.theme(borderless = 0, base_size = 9, bordersize = 1) +
  coord_cartesian(xlim = c(0.096, 0.904), ylim = c(0.799, 1.001), expand = FALSE) +
  scale_x_continuous(limits = c(0.1, 0.904), breaks = seq(0.1, 0.9, by = 0.1), labels = c("", "0.2", "", "0.4", "", "0.6", "", "0.8", "")) +
  scale_y_continuous(limits = c(0.803, 1.003), breaks = c(0.8, 0.85, 0.9, 0.95, 1), labels = c("0.8", "", "0.9", "", "1")) +
  labs(x = "Proportion that misreport", y = "", title = "Coverage of 90%\nconfidence interval") +
  geom_hline(yintercept = 0.9, linetype = 3) +
  geom_line(show.legend = FALSE) +
  scale_size_manual(values = c(0.25, 0.3, 0.4)) +
  scale_color_manual(values = c("grey85", "grey60", "black")) +
  theme(legend.position = c(0.75, 1.3),
        legend.background = element_blank(),
        plot.margin = unit(c(0.25, 0.5, 1.25, -0.75), "lines")) +
  guides(shape = guide_legend(override.aes = list(size = 2.75)))

# FIGURE 3 (LOWER PANEL)
pdf(paste0(outputDirectory, "/EADY-FIGURE3-SimStudy3B-raw.pdf"), 6, 2.4)
grid.arrange(graphRMSE, graphBias, graphCoverage, ncol = 3)
dev.off()

